Upload dataset

library(AmesHousing)
ames = make_ames()

ames 
head(ames)
# gives first six observations 
# good for checking that all variables you want are in data set and for looking at type of variables 
str(ames)
tibble [2,930 × 81] (S3: tbl_df/tbl/data.frame)
 $ MS_SubClass       : Factor w/ 16 levels "One_Story_1946_and_Newer_All_Styles",..: 1 1 1 1 6 6 12 12 12 6 ...
 $ MS_Zoning         : Factor w/ 7 levels "Floating_Village_Residential",..: 3 2 3 3 3 3 3 3 3 3 ...
 $ Lot_Frontage      : num [1:2930] 141 80 81 93 74 78 41 43 39 60 ...
 $ Lot_Area          : int [1:2930] 31770 11622 14267 11160 13830 9978 4920 5005 5389 7500 ...
 $ Street            : Factor w/ 2 levels "Grvl","Pave": 2 2 2 2 2 2 2 2 2 2 ...
 $ Alley             : Factor w/ 3 levels "Gravel","No_Alley_Access",..: 2 2 2 2 2 2 2 2 2 2 ...
 $ Lot_Shape         : Factor w/ 4 levels "Regular","Slightly_Irregular",..: 2 1 2 1 2 2 1 2 2 1 ...
 $ Land_Contour      : Factor w/ 4 levels "Bnk","HLS","Low",..: 4 4 4 4 4 4 4 2 4 4 ...
 $ Utilities         : Factor w/ 3 levels "AllPub","NoSeWa",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Lot_Config        : Factor w/ 5 levels "Corner","CulDSac",..: 1 5 1 1 5 5 5 5 5 5 ...
 $ Land_Slope        : Factor w/ 3 levels "Gtl","Mod","Sev": 1 1 1 1 1 1 1 1 1 1 ...
 $ Neighborhood      : Factor w/ 29 levels "North_Ames","College_Creek",..: 1 1 1 1 7 7 17 17 17 7 ...
 $ Condition_1       : Factor w/ 9 levels "Artery","Feedr",..: 3 2 3 3 3 3 3 3 3 3 ...
 $ Condition_2       : Factor w/ 8 levels "Artery","Feedr",..: 3 3 3 3 3 3 3 3 3 3 ...
 $ Bldg_Type         : Factor w/ 5 levels "OneFam","TwoFmCon",..: 1 1 1 1 1 1 5 5 5 1 ...
 $ House_Style       : Factor w/ 8 levels "One_and_Half_Fin",..: 3 3 3 3 8 8 3 3 3 8 ...
 $ Overall_Qual      : Factor w/ 10 levels "Very_Poor","Poor",..: 6 5 6 7 5 6 8 8 8 7 ...
 $ Overall_Cond      : Factor w/ 10 levels "Very_Poor","Poor",..: 5 6 6 5 5 6 5 5 5 5 ...
 $ Year_Built        : int [1:2930] 1960 1961 1958 1968 1997 1998 2001 1992 1995 1999 ...
 $ Year_Remod_Add    : int [1:2930] 1960 1961 1958 1968 1998 1998 2001 1992 1996 1999 ...
 $ Roof_Style        : Factor w/ 6 levels "Flat","Gable",..: 4 2 4 4 2 2 2 2 2 2 ...
 $ Roof_Matl         : Factor w/ 8 levels "ClyTile","CompShg",..: 2 2 2 2 2 2 2 2 2 2 ...
 $ Exterior_1st      : Factor w/ 16 levels "AsbShng","AsphShn",..: 4 14 15 4 14 14 6 7 6 14 ...
 $ Exterior_2nd      : Factor w/ 17 levels "AsbShng","AsphShn",..: 11 15 16 4 15 15 6 7 6 15 ...
 $ Mas_Vnr_Type      : Factor w/ 5 levels "BrkCmn","BrkFace",..: 5 4 2 4 4 2 4 4 4 4 ...
 $ Mas_Vnr_Area      : num [1:2930] 112 0 108 0 0 20 0 0 0 0 ...
 $ Exter_Qual        : Factor w/ 4 levels "Excellent","Fair",..: 4 4 4 3 4 4 3 3 3 4 ...
 $ Exter_Cond        : Factor w/ 5 levels "Excellent","Fair",..: 5 5 5 5 5 5 5 5 5 5 ...
 $ Foundation        : Factor w/ 6 levels "BrkTil","CBlock",..: 2 2 2 2 3 3 3 3 3 3 ...
 $ Bsmt_Qual         : Factor w/ 6 levels "Excellent","Fair",..: 6 6 6 6 3 6 3 3 3 6 ...
 $ Bsmt_Cond         : Factor w/ 6 levels "Excellent","Fair",..: 3 6 6 6 6 6 6 6 6 6 ...
 $ Bsmt_Exposure     : Factor w/ 5 levels "Av","Gd","Mn",..: 2 4 4 4 4 4 3 4 4 4 ...
 $ BsmtFin_Type_1    : Factor w/ 7 levels "ALQ","BLQ","GLQ",..: 2 6 1 1 3 3 3 1 3 7 ...
 $ BsmtFin_SF_1      : num [1:2930] 2 6 1 1 3 3 3 1 3 7 ...
 $ BsmtFin_Type_2    : Factor w/ 7 levels "ALQ","BLQ","GLQ",..: 7 4 7 7 7 7 7 7 7 7 ...
 $ BsmtFin_SF_2      : num [1:2930] 0 144 0 0 0 0 0 0 0 0 ...
 $ Bsmt_Unf_SF       : num [1:2930] 441 270 406 1045 137 ...
 $ Total_Bsmt_SF     : num [1:2930] 1080 882 1329 2110 928 ...
 $ Heating           : Factor w/ 6 levels "Floor","GasA",..: 2 2 2 2 2 2 2 2 2 2 ...
 $ Heating_QC        : Factor w/ 5 levels "Excellent","Fair",..: 2 5 5 1 3 1 1 1 1 3 ...
 $ Central_Air       : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
 $ Electrical        : Factor w/ 6 levels "FuseA","FuseF",..: 5 5 5 5 5 5 5 5 5 5 ...
 $ First_Flr_SF      : int [1:2930] 1656 896 1329 2110 928 926 1338 1280 1616 1028 ...
 $ Second_Flr_SF     : int [1:2930] 0 0 0 0 701 678 0 0 0 776 ...
 $ Low_Qual_Fin_SF   : int [1:2930] 0 0 0 0 0 0 0 0 0 0 ...
 $ Gr_Liv_Area       : int [1:2930] 1656 896 1329 2110 1629 1604 1338 1280 1616 1804 ...
 $ Bsmt_Full_Bath    : num [1:2930] 1 0 0 1 0 0 1 0 1 0 ...
 $ Bsmt_Half_Bath    : num [1:2930] 0 0 0 0 0 0 0 0 0 0 ...
 $ Full_Bath         : int [1:2930] 1 1 1 2 2 2 2 2 2 2 ...
 $ Half_Bath         : int [1:2930] 0 0 1 1 1 1 0 0 0 1 ...
 $ Bedroom_AbvGr     : int [1:2930] 3 2 3 3 3 3 2 2 2 3 ...
 $ Kitchen_AbvGr     : int [1:2930] 1 1 1 1 1 1 1 1 1 1 ...
 $ Kitchen_Qual      : Factor w/ 5 levels "Excellent","Fair",..: 5 5 3 1 5 3 3 3 3 3 ...
 $ TotRms_AbvGrd     : int [1:2930] 7 5 6 8 6 7 6 5 5 7 ...
 $ Functional        : Factor w/ 8 levels "Maj1","Maj2",..: 8 8 8 8 8 8 8 8 8 8 ...
 $ Fireplaces        : int [1:2930] 2 0 0 2 1 1 0 0 1 1 ...
 $ Fireplace_Qu      : Factor w/ 6 levels "Excellent","Fair",..: 3 4 4 6 6 3 4 4 6 6 ...
 $ Garage_Type       : Factor w/ 7 levels "Attchd","Basment",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Garage_Finish     : Factor w/ 4 levels "Fin","No_Garage",..: 1 4 4 1 1 1 1 3 3 1 ...
 $ Garage_Cars       : num [1:2930] 2 1 1 2 2 2 2 2 2 2 ...
 $ Garage_Area       : num [1:2930] 528 730 312 522 482 470 582 506 608 442 ...
 $ Garage_Qual       : Factor w/ 6 levels "Excellent","Fair",..: 6 6 6 6 6 6 6 6 6 6 ...
 $ Garage_Cond       : Factor w/ 6 levels "Excellent","Fair",..: 6 6 6 6 6 6 6 6 6 6 ...
 $ Paved_Drive       : Factor w/ 3 levels "Dirt_Gravel",..: 2 3 3 3 3 3 3 3 3 3 ...
 $ Wood_Deck_SF      : int [1:2930] 210 140 393 0 212 360 0 0 237 140 ...
 $ Open_Porch_SF     : int [1:2930] 62 0 36 0 34 36 0 82 152 60 ...
 $ Enclosed_Porch    : int [1:2930] 0 0 0 0 0 0 170 0 0 0 ...
 $ Three_season_porch: int [1:2930] 0 0 0 0 0 0 0 0 0 0 ...
 $ Screen_Porch      : int [1:2930] 0 120 0 0 0 0 0 144 0 0 ...
 $ Pool_Area         : int [1:2930] 0 0 0 0 0 0 0 0 0 0 ...
 $ Pool_QC           : Factor w/ 5 levels "Excellent","Fair",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ Fence             : Factor w/ 5 levels "Good_Privacy",..: 5 3 5 5 3 5 5 5 5 5 ...
 $ Misc_Feature      : Factor w/ 6 levels "Elev","Gar2",..: 3 3 2 3 3 3 3 3 3 3 ...
 $ Misc_Val          : int [1:2930] 0 0 12500 0 0 0 0 0 0 0 ...
 $ Mo_Sold           : int [1:2930] 5 6 6 4 3 6 4 1 3 6 ...
 $ Year_Sold         : int [1:2930] 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
 $ Sale_Type         : Factor w/ 10 levels "COD","Con","ConLD",..: 10 10 10 10 10 10 10 10 10 10 ...
 $ Sale_Condition    : Factor w/ 6 levels "Abnorml","AdjLand",..: 5 5 5 5 5 5 5 5 5 5 ...
 $ Sale_Price        : int [1:2930] 215000 105000 172000 244000 189900 195500 213500 191500 236500 189000 ...
 $ Longitude         : num [1:2930] -93.6 -93.6 -93.6 -93.6 -93.6 ...
 $ Latitude          : num [1:2930] 42.1 42.1 42.1 42.1 42.1 ...
 - attr(*, "spec")=List of 2
  ..$ cols   :List of 82
  .. ..$ Order          : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ PID            : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ MS SubClass    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ MS Zoning      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Lot Frontage   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Lot Area       : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Street         : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Alley          : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Lot Shape      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Land Contour   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Utilities      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Lot Config     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Land Slope     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Neighborhood   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Condition 1    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Condition 2    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Bldg Type      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ House Style    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Overall Qual   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Overall Cond   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Year Built     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Year Remod/Add : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Roof Style     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Roof Matl      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Exterior 1st   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Exterior 2nd   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Mas Vnr Type   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Mas Vnr Area   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Exter Qual     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Exter Cond     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Foundation     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Bsmt Qual      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Bsmt Cond      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Bsmt Exposure  : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ BsmtFin Type 1 : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ BsmtFin SF 1   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ BsmtFin Type 2 : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ BsmtFin SF 2   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Bsmt Unf SF    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Total Bsmt SF  : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Heating        : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Heating QC     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Central Air    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Electrical     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ 1st Flr SF     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ 2nd Flr SF     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Low Qual Fin SF: list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Gr Liv Area    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Bsmt Full Bath : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Bsmt Half Bath : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Full Bath      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Half Bath      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Bedroom AbvGr  : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Kitchen AbvGr  : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Kitchen Qual   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ TotRms AbvGrd  : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Functional     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Fireplaces     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Fireplace Qu   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Garage Type    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Garage Yr Blt  : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Garage Finish  : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Garage Cars    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Garage Area    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Garage Qual    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Garage Cond    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Paved Drive    : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Wood Deck SF   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Open Porch SF  : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Enclosed Porch : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ 3Ssn Porch     : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Screen Porch   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Pool Area      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Pool QC        : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Fence          : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Misc Feature   : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Misc Val       : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Mo Sold        : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Yr Sold        : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  .. ..$ Sale Type      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ Sale Condition : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
  .. ..$ SalePrice      : list()
  .. .. ..- attr(*, "class")= chr [1:2] "collector_integer" "collector"
  ..$ default: list()
  .. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
  ..- attr(*, "class")= chr "col_spec"
# gives you the structure of the data 
# Data types - how the data are stored in R
# Levels are categories

Min and Max of data

# use $ and variable name
min(ames$Lot_Frontage)
[1] 0
max(ames$Lot_Frontage)
[1] 313
ames

Range

range(ames$Lot_Area)
[1]   1300 215245

Mean

mean(ames$Lot_Area)
[1] 10147.92

Median

median(ames$Lot_Area)
[1] 9436.5
## can also use quantile function
quantile(ames$Lot_Area, 0.5)
   50% 
9436.5 
quantile(ames$Lot_Area, 0.25)
    25% 
7440.25 
# can do with any quartiles 

Interquartile Range

# difference between first and third quartiles
IQR(ames$Lot_Area)
[1] 4115

Standard Dev and Variance

sd(ames$Lot_Area)
[1] 7880.018
var(ames$Lot_Area)
[1] 62094680
## can also apply to multiple columns 

# can also try : ames[,c(“lot_area”,”lot_frontage")


lapply(ames[, 3:4], sd)
$Lot_Frontage
[1] 33.49944

$Lot_Area
[1] 7880.018
# remember that lapply function applies a specific function to data --> i.e. (data you want, function you want)

Really helpful summary function

summary(ames[, 3:4])
  Lot_Frontage       Lot_Area     
 Min.   :  0.00   Min.   :  1300  
 1st Qu.: 43.00   1st Qu.:  7440  
 Median : 63.00   Median :  9436  
 Mean   : 57.65   Mean   : 10148  
 3rd Qu.: 78.00   3rd Qu.: 11555  
 Max.   :313.00   Max.   :215245  

Mode

Unfortunately, there is no function in R to find the mode of a variable (that I know of). Here’s one way I know of to do it.

table_ames <- table(ames$Lot_Area) # number of occurrences for each unique value
sort(table_ames, decreasing = TRUE) # sort highest to lowest

# although the printout is long, we can see that the mode for lot area is 9600

Counts

summary(ames$Bldg_Type)
  OneFam TwoFmCon   Duplex    Twnhs   TwnhsE 
    2425       62      109      101      233 
# gives you the number of each factor in the data 

Conditional Counts for Nominal/Ordinal Variables

# lets say we want to count the number of each "paved"
summary(ames$Paved_Drive)
     Dirt_Gravel Partial_Pavement            Paved 
             216               62             2652 
# another way to count the number of paved 
sum(ames$Paved_Drive == "Paved")
[1] 2652
# or condition 
sum(ames$Paved_Drive == "Paved" | ames$Paved_Drive == "Dirt_Gravel")
[1] 2868
# and condition 

sum(ames$Paved_Drive == "Paved" & ames$Paved_Drive == "Dirt_Gravel")
[1] 0
sum(ames$Paved_Drive == "Paved" & ames$Alley == "No_Alley_Access")
[1] 2518

Conditional Counts for Numeric Variables

# find the number of lots greater than 1000
sum(ames$Lot_Area > 1000, na.rm=TRUE)
[1] 2930
# na.rm = TRUE removes missing values for you 
# find the number of lots between 1000 and 2000
sum(ames$Lot_Area > 1000 & ames$Lot_Area < 2000, na.rm=TRUE)
[1] 57
library(dplyr)
# ames2 = ames %>% mutate(filtered_area = ifelse(Lot_Area > 10000 & Lot_Area < 20000, 1, 0))%>%select(filtered_area, everything())
ames2 = ames %>% mutate(filtered_area = ifelse(Lot_Area > 10000 & Lot_Area < 20000, 1, 0))%>%select(filtered_area, where(filtered_area==1))
Error: object 'filtered_area' not found
Run `rlang::last_error()` to see where the error occurred.
library(dplyr)
ames2 = ames %>% mutate(filtered_area = ifelse(Lot_Area > 10000 & Lot_Area < 20000, 1, 0))%>%filter(filtered_area ==1)
ames2
# now we have a variable at the end of our data set with this condition present

Barplot

barplot(table(ames$Lot_Shape)) # table() is mandatory

# really basic automatic barplot in R 
# more fancy, customizable barplot in R
library(ggplot2)

ggplot(ames, aes(x = Lot_Shape, fill = Lot_Shape)) +
  geom_bar() +
  ggtitle("Lot Shape of Houses in Ames") +
  xlab("Lot Shape") + ylab("Number of Houses")

NA

Line Plot

plot(ames$Lot_Area,
  type = "l"
) # "l" for line

Histogram

hist(ames$Lot_Frontage)

## can do same in ggplot 
ggplot(ames) +
  aes(x = Lot_Frontage) +
  geom_histogram(bins = 15)

# can change number of bins 

Boxplot

boxplot(ames$Lot_Frontage)

# side by side comparison of numerical to categorical 
boxplot(ames$Lot_Frontage ~ ames$Alley)

ggplot(ames) +
  aes(x = Alley, y = Lot_Frontage) +
  geom_boxplot()

Scatterplot

ames
plot(ames$Lot_Area, ames$Gr_Liv_Area)
ggplot(ames) +
  aes(x = ames$Lot_Area, y = ames$Gr_Liv_Area) +
  geom_point()
# add in a categorical factor for more info 
ggplot(ames) +
  aes(x = ames$Lot_Area, y = ames$Gr_Liv_Area, color = Alley) +
  geom_point() +
  scale_color_hue()

QQplot for Normality Assumptions

# Draw points on the qq-plot:
qqnorm(ames$Lot_Area)
# Draw the reference line:
qqline(ames$Lot_Area)

We can see that this deviates a lot from normality (the theoretical line of normal data), so this normality assumption would be broken.

Density plots for distribution

plot(density(ames$Lot_Area))
LS0tCnRpdGxlOiAiRGVzY3JpcHRpdmUgU3RhdHMiCm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgZGZfcHJpbnQ6IHBhZ2VkCi0tLQogCiMjIFVwbG9hZCBkYXRhc2V0IAoKYGBge3J9CmxpYnJhcnkoQW1lc0hvdXNpbmcpCmFtZXMgPSBtYWtlX2FtZXMoKQoKYW1lcyAKYGBgCgpgYGB7cn0KaGVhZChhbWVzKQojIGdpdmVzIGZpcnN0IHNpeCBvYnNlcnZhdGlvbnMgCiMgZ29vZCBmb3IgY2hlY2tpbmcgdGhhdCBhbGwgdmFyaWFibGVzIHlvdSB3YW50IGFyZSBpbiBkYXRhIHNldCBhbmQgZm9yIGxvb2tpbmcgYXQgdHlwZSBvZiB2YXJpYWJsZXMgCmBgYAoKYGBge3J9CnN0cihhbWVzKQojIGdpdmVzIHlvdSB0aGUgc3RydWN0dXJlIG9mIHRoZSBkYXRhIAojIERhdGEgdHlwZXMgLSBob3cgdGhlIGRhdGEgYXJlIHN0b3JlZCBpbiBSCiMgTGV2ZWxzIGFyZSBjYXRlZ29yaWVzCmBgYAoKCiMjIE1pbiBhbmQgTWF4IG9mIGRhdGEgCgpgYGB7cn0KIyB1c2UgJCBhbmQgdmFyaWFibGUgbmFtZQptaW4oYW1lcyRMb3RfRnJvbnRhZ2UpCm1heChhbWVzJExvdF9Gcm9udGFnZSkKYW1lcwpgYGAKCiMjIFJhbmdlIAoKYGBge3J9CnJhbmdlKGFtZXMkTG90X0FyZWEpCmBgYAoKIyMgTWVhbiAKCmBgYHtyfQptZWFuKGFtZXMkTG90X0FyZWEpCmBgYAoKIyMgTWVkaWFuIAoKYGBge3J9Cm1lZGlhbihhbWVzJExvdF9BcmVhKQpgYGAKCmBgYHtyfQojIyBjYW4gYWxzbyB1c2UgcXVhbnRpbGUgZnVuY3Rpb24KcXVhbnRpbGUoYW1lcyRMb3RfQXJlYSwgMC41KQpxdWFudGlsZShhbWVzJExvdF9BcmVhLCAwLjI1KQojIGNhbiBkbyB3aXRoIGFueSBxdWFydGlsZXMgCmBgYAogCiMjIEludGVycXVhcnRpbGUgUmFuZ2UgCgpgYGB7cn0KIyBkaWZmZXJlbmNlIGJldHdlZW4gZmlyc3QgYW5kIHRoaXJkIHF1YXJ0aWxlcwpJUVIoYW1lcyRMb3RfQXJlYSkKYGBgCgojIyBTdGFuZGFyZCBEZXYgYW5kIFZhcmlhbmNlIAoKYGBge3J9CnNkKGFtZXMkTG90X0FyZWEpCnZhcihhbWVzJExvdF9BcmVhKQpgYGAKCmBgYHtyfQojIyBjYW4gYWxzbyBhcHBseSB0byBtdWx0aXBsZSBjb2x1bW5zIAoKIyBjYW4gYWxzbyB0cnkgOiBhbWVzWyxjKOKAnGxvdF9hcmVh4oCdLOKAnWxvdF9mcm9udGFnZSIpCgoKbGFwcGx5KGFtZXNbLCAzOjRdLCBzZCkKIyByZW1lbWJlciB0aGF0IGxhcHBseSBmdW5jdGlvbiBhcHBsaWVzIGEgc3BlY2lmaWMgZnVuY3Rpb24gdG8gZGF0YSAtLT4gaS5lLiAoZGF0YSB5b3Ugd2FudCwgZnVuY3Rpb24geW91IHdhbnQpCmBgYAoKIyMgUmVhbGx5IGhlbHBmdWwgc3VtbWFyeSBmdW5jdGlvbgoKYGBge3J9CnN1bW1hcnkoYW1lc1ssIDM6NF0pCmBgYAoKIyMgTW9kZSAKClVuZm9ydHVuYXRlbHksIHRoZXJlIGlzIG5vIGZ1bmN0aW9uIGluIFIgdG8gZmluZCB0aGUgbW9kZSBvZiBhIHZhcmlhYmxlICh0aGF0IEkga25vdyBvZikuIEhlcmUncyBvbmUgd2F5IEkga25vdyBvZiB0byBkbyBpdC4KCmBgYHtyfQp0YWJsZV9hbWVzIDwtIHRhYmxlKGFtZXMkTG90X0FyZWEpICMgbnVtYmVyIG9mIG9jY3VycmVuY2VzIGZvciBlYWNoIHVuaXF1ZSB2YWx1ZQpzb3J0KHRhYmxlX2FtZXMsIGRlY3JlYXNpbmcgPSBUUlVFKSAjIHNvcnQgaGlnaGVzdCB0byBsb3dlc3QKCiMgYWx0aG91Z2ggdGhlIHByaW50b3V0IGlzIGxvbmcsIHdlIGNhbiBzZWUgdGhhdCB0aGUgbW9kZSBmb3IgbG90IGFyZWEgaXMgOTYwMApgYGAKCiMjIENvdW50cyAKCmBgYHtyfQpzdW1tYXJ5KGFtZXMkQmxkZ19UeXBlKQojIGdpdmVzIHlvdSB0aGUgbnVtYmVyIG9mIGVhY2ggZmFjdG9yIGluIHRoZSBkYXRhIApgYGAKCiMjIENvbmRpdGlvbmFsIENvdW50cyBmb3IgTm9taW5hbC9PcmRpbmFsIFZhcmlhYmxlcwoKYGBge3J9CiMgbGV0cyBzYXkgd2Ugd2FudCB0byBjb3VudCB0aGUgbnVtYmVyIG9mIGVhY2ggInBhdmVkIgpzdW1tYXJ5KGFtZXMkUGF2ZWRfRHJpdmUpCgojIGFub3RoZXIgd2F5IHRvIGNvdW50IHRoZSBudW1iZXIgb2YgcGF2ZWQgCnN1bShhbWVzJFBhdmVkX0RyaXZlID09ICJQYXZlZCIpCgojIG9yIGNvbmRpdGlvbiAKc3VtKGFtZXMkUGF2ZWRfRHJpdmUgPT0gIlBhdmVkIiB8IGFtZXMkUGF2ZWRfRHJpdmUgPT0gIkRpcnRfR3JhdmVsIikKCiMgYW5kIGNvbmRpdGlvbiAKCnN1bShhbWVzJFBhdmVkX0RyaXZlID09ICJQYXZlZCIgJiBhbWVzJFBhdmVkX0RyaXZlID09ICJEaXJ0X0dyYXZlbCIpCnN1bShhbWVzJFBhdmVkX0RyaXZlID09ICJQYXZlZCIgJiBhbWVzJEFsbGV5ID09ICJOb19BbGxleV9BY2Nlc3MiKQoKYGBgCgojIyBDb25kaXRpb25hbCBDb3VudHMgZm9yIE51bWVyaWMgVmFyaWFibGVzIAoKYGBge3J9CiMgZmluZCB0aGUgbnVtYmVyIG9mIGxvdHMgZ3JlYXRlciB0aGFuIDEwMDAKc3VtKGFtZXMkTG90X0FyZWEgPiAxMDAwLCBuYS5ybT1UUlVFKQojIG5hLnJtID0gVFJVRSByZW1vdmVzIG1pc3NpbmcgdmFsdWVzIGZvciB5b3UgCmBgYAoKYGBge3J9CiMgZmluZCB0aGUgbnVtYmVyIG9mIGxvdHMgYmV0d2VlbiAxMDAwIGFuZCAyMDAwCnN1bShhbWVzJExvdF9BcmVhID4gMTAwMCAmIGFtZXMkTG90X0FyZWEgPCAyMDAwLCBuYS5ybT1UUlVFKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KGRwbHlyKQphbWVzMiA9IGFtZXMgJT4lIG11dGF0ZShmaWx0ZXJlZF9hcmVhID0gaWZlbHNlKExvdF9BcmVhID4gMTAwMDAgJiBMb3RfQXJlYSA8IDIwMDAwLCAxLCAwKSklPiVzZWxlY3QoZmlsdGVyZWRfYXJlYSwgZXZlcnl0aGluZygpKQoKYW1lczIKIyBub3cgd2UgaGF2ZSBhIHZhcmlhYmxlIGF0IHRoZSBlbmQgb2Ygb3VyIGRhdGEgc2V0IHdpdGggdGhpcyBjb25kaXRpb24gcHJlc2VudCAKYGBgCmBgYHtyfQpsaWJyYXJ5KGRwbHlyKQphbWVzMiA9IGFtZXMgJT4lIG11dGF0ZShmaWx0ZXJlZF9hcmVhID0gaWZlbHNlKExvdF9BcmVhID4gMTAwMDAgJiBMb3RfQXJlYSA8IDIwMDAwLCAxLCAwKSklPiVmaWx0ZXIoZmlsdGVyZWRfYXJlYSA9PTEpCmFtZXMyCiMgbm93IHdlIGhhdmUgYSB2YXJpYWJsZSBhdCB0aGUgZW5kIG9mIG91ciBkYXRhIHNldCB3aXRoIHRoaXMgY29uZGl0aW9uIHByZXNlbnQKYGBgCgojIyBCYXJwbG90IAoKYGBge3J9CmJhcnBsb3QodGFibGUoYW1lcyRMb3RfU2hhcGUpKSAjIHRhYmxlKCkgaXMgbWFuZGF0b3J5CiMgcmVhbGx5IGJhc2ljIGF1dG9tYXRpYyBiYXJwbG90IGluIFIgCmBgYAoKYGBge3J9CiMgbW9yZSBmYW5jeSwgY3VzdG9taXphYmxlIGJhcnBsb3QgaW4gUgpsaWJyYXJ5KGdncGxvdDIpCgpnZ3Bsb3QoYW1lcywgYWVzKHggPSBMb3RfU2hhcGUsIGZpbGwgPSBMb3RfU2hhcGUpKSArCiAgZ2VvbV9iYXIoKSArCiAgZ2d0aXRsZSgiTG90IFNoYXBlIG9mIEhvdXNlcyBpbiBBbWVzIikgKwogIHhsYWIoIkxvdCBTaGFwZSIpICsgeWxhYigiTnVtYmVyIG9mIEhvdXNlcyIpCiAgCmBgYAoKIyMgTGluZSBQbG90IAoKYGBge3J9CnBsb3QoYW1lcyRMb3RfQXJlYSwKICB0eXBlID0gImwiCikgIyAibCIgZm9yIGxpbmUKYGBgCgojIyBIaXN0b2dyYW0gCgpgYGB7cn0KaGlzdChhbWVzJExvdF9Gcm9udGFnZSkKYGBgCgpgYGB7cn0KIyMgY2FuIGRvIHNhbWUgaW4gZ2dwbG90IApnZ3Bsb3QoYW1lcykgKwogIGFlcyh4ID0gTG90X0Zyb250YWdlKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlucyA9IDE1KQojIGNhbiBjaGFuZ2UgbnVtYmVyIG9mIGJpbnMgCmBgYAoKIyMgQm94cGxvdCAKCmBgYHtyfQpib3hwbG90KGFtZXMkTG90X0Zyb250YWdlKQpgYGAKCmBgYHtyfQojIHNpZGUgYnkgc2lkZSBjb21wYXJpc29uIG9mIG51bWVyaWNhbCB0byBjYXRlZ29yaWNhbCAKYm94cGxvdChhbWVzJExvdF9Gcm9udGFnZSB+IGFtZXMkQWxsZXkpCmBgYAoKYGBge3J9CmdncGxvdChhbWVzKSArCiAgYWVzKHggPSBBbGxleSwgeSA9IExvdF9Gcm9udGFnZSkgKwogIGdlb21fYm94cGxvdCgpCmBgYAoKIyMgU2NhdHRlcnBsb3QgCgoKYGBge3J9CmFtZXMKcGxvdChhbWVzJExvdF9BcmVhLCBhbWVzJEdyX0xpdl9BcmVhKQpgYGAKCmBgYHtyfQpnZ3Bsb3QoYW1lcykgKwogIGFlcyh4ID0gYW1lcyRMb3RfQXJlYSwgeSA9IGFtZXMkR3JfTGl2X0FyZWEpICsKICBnZW9tX3BvaW50KCkKYGBgCmBgYHtyfQojIGFkZCBpbiBhIGNhdGVnb3JpY2FsIGZhY3RvciBmb3IgbW9yZSBpbmZvIApnZ3Bsb3QoYW1lcykgKwogIGFlcyh4ID0gYW1lcyRMb3RfQXJlYSwgeSA9IGFtZXMkR3JfTGl2X0FyZWEsIGNvbG9yID0gQWxsZXkpICsKICBnZW9tX3BvaW50KCkgKwogIHNjYWxlX2NvbG9yX2h1ZSgpCmBgYAoKIyMgUVFwbG90IGZvciBOb3JtYWxpdHkgQXNzdW1wdGlvbnMgCgpgYGB7cn0KIyBEcmF3IHBvaW50cyBvbiB0aGUgcXEtcGxvdDoKcXFub3JtKGFtZXMkTG90X0FyZWEpCiMgRHJhdyB0aGUgcmVmZXJlbmNlIGxpbmU6CnFxbGluZShhbWVzJExvdF9BcmVhKQpgYGAKCldlIGNhbiBzZWUgdGhhdCB0aGlzIGRldmlhdGVzIGEgbG90IGZyb20gbm9ybWFsaXR5ICh0aGUgdGhlb3JldGljYWwgbGluZSBvZiBub3JtYWwgZGF0YSksIHNvIHRoaXMgbm9ybWFsaXR5IGFzc3VtcHRpb24gd291bGQgYmUgYnJva2VuLiAKCiMjIERlbnNpdHkgcGxvdHMgZm9yIGRpc3RyaWJ1dGlvbiAKCmBgYHtyfQpwbG90KGRlbnNpdHkoYW1lcyRMb3RfQXJlYSkpCmBgYAoK